#############################################################################################
# LINEAR PROBABILITY MODELS OF RESCUE MODES ON RESCUER HOUSEHOLD TYPE IN SELECTED COUNTRIES #
#############################################################################################
# Author: Kasia Nalewajko
# First created: 7 May 2021
# Replicated: 10 June 2024

rm(list = ls())

# LOAD PACKAGES -----------------------------------------------------------

if (!require("dplyr")) install.packages("dplyr")
if (!require("tidyr")) install.packages("tidyr")
if (!require("stringr")) install.packages("stringr")
if (!require("lmtest")) install.packages("lmtest")
if (!require("sandwich")) install.packages("sandwich")
if (!require("ggplot2")) install.packages("ggplot2")

# LOAD DATA ---------------------------------------------------------------

load("./00 SUBMITTED/00 APSR final/04 replication_files/01 data/RaN.Rda")

# CLEAN DATA ----

ran2 <- ran %>% 
  dplyr::select(file_no, rescuers_no, status, rescuees_no, rescue_country, rescue_loc, rescue_mode) %>% 
  unique()

ran2$hiding <- ifelse(ran2$rescue_mode == "Hiding", 1, 0)
ran2$documents <- ifelse(ran2$rescue_mode == "Providing forged documents", 1, 0)
ran2$goods <- ifelse(ran2$rescue_mode == "Supplying basic goods", 1, 0)
ran2$recruiting <- ifelse(ran2$rescue_mode == "Arranging shelter", 1, 0)
ran2$transfer <- ifelse(ran2$rescue_mode == "Illegal transfer", 1, 0)
ran2$false_evidence <- ifelse(ran2$rescue_mode == "Providing false evidence", 1, 0)
ran2$other <- ifelse(ran2$rescue_mode == "Other", 1, 0)

temp <- ran2 %>% 
  dplyr::group_by(file_no) %>% 
  dplyr::summarise(across(.cols = c(8:14),
                          .fns = ~sum(.)))

temp <- temp %>% 
  mutate(across(.cols = c(2:7),
                .fns = ~ifelse(.>1, 1, .)))

unique_rans <- ran2 %>% 
  dplyr::select(hid, file_no, rescuers_no, status, rescuees_no, rescue_country, rescue_loc) %>% 
  unique()

unique_rans <- left_join(unique_rans, temp, by = "file_no")

only_big <- unique_rans %>% 
  group_by(rescue_country) %>% 
  summarise(rescuers_sum = n()) %>% 
  filter(!is.na(rescue_country) & rescuers_sum > 100) %>% 
  arrange(desc(rescuers_sum))

only_big <- unlist(only_big$rescue_country)

reg <- summary(lm(formula = hiding ~ status + rescue_loc,
                  data = subset(unique_rans, rescue_country == only_big[14])))

# RUN THE MODELS IN A LOOP ---------------------------------------------------------------

# Generate vector of outcomes
outcomes <- c("hiding",
              "recruiting",
              "documents",
              "transfer")

models_df <- data.frame(country = NULL, model = NULL, outcome = NULL, estimate_contact = NULL, std_error_contact = NULL, estimate_resister = NULL, std_error_resister = NULL)

# Loop over outcomes
for(i in 1:length(only_big)){
  for (j in 1:length(outcomes)){
    
    # Models with no controls
    models_list <- summary(lm(as.formula(paste(outcomes[j],"~ status")),
                              data = subset(unique_rans, rescue_country == only_big[i])))
    country <- only_big[i]
    model <- "Bivariate"
    outcome <- outcomes[j]
    estimate_contact <- models_list$coefficients[2]
    estimate_resister <- models_list$coefficients[3]
    std_error_contact <- models_list$coefficients[2,2]
    std_error_resister <- models_list$coefficients[3,2]
    models_df_temp <- data.frame(country = country, model = model, outcome = outcome, estimate_contact = estimate_contact, estimate_resister = estimate_resister, std_error_contact = std_error_contact, std_error_resister = std_error_resister)
    models_df <- rbind(models_df, models_df_temp)
    
    # Models with locality FE
    models_list <- summary(lm(as.formula(paste(outcomes[j],"~ status",
                                               "+ as.factor(rescue_loc)")),
                              data = subset(unique_rans, rescue_country == only_big[i])))
    country <- only_big[i]
    model <- "Locality FE"
    outcome <- outcomes[j]
    estimate_contact <- models_list$coefficients[2]
    estimate_resister <- models_list$coefficients[3]
    std_error_contact <- models_list$coefficients[2,2]
    std_error_resister <- models_list$coefficients[3,2]
    models_df_temp <- data.frame(country = country, model = model, outcome = outcome, estimate_contact = estimate_contact, estimate_resister = estimate_resister, std_error_contact = std_error_contact, std_error_resister = std_error_resister)
    models_df <- rbind(models_df, models_df_temp)
  }
  print(i)
}

models_df <- models_df %>% 
  mutate(ci_hi_95_contact = estimate_contact + 1.96 * std_error_contact,
         ci_lo_95_contact = estimate_contact - 1.96 * std_error_contact,
         ci_hi_90_contact = estimate_contact + 1.645 * std_error_contact,
         ci_lo_90_contact = estimate_contact - 1.645 * std_error_contact,
         ci_hi_95_resister = estimate_resister + 1.96 * std_error_resister,
         ci_lo_95_resister = estimate_resister - 1.96 * std_error_resister,
         ci_hi_90_resister = estimate_resister + 1.645 * std_error_resister,
         ci_lo_90_resister = estimate_resister - 1.645 * std_error_resister)

models_df <- models_df %>%
  gather(key, value, -country, -outcome, -model) %>% # turn everything into long df format
  tidyr::extract(col = key, into = c("statistic", "hh_status"), regex = "([\\w*\\d*_]*)_([a-z]*)$") %>% # separate the key column into two identifying columns -- make sure to use "()" in regex argument
  spread(statistic, value) # spread again to wide

models_df$hh_status <- stringr::str_replace_all(models_df$hh_status, pattern = "^contact$", replacement = "Rescuer in contact with insurgents")
models_df$hh_status <- stringr::str_replace_all(models_df$hh_status, pattern = "^resister$", replacement = "Rescuer-insurgent")

models_df$outcome <- ifelse(models_df$outcome == "recruiting", "network", models_df$outcome)
models_df$outcome <- ifelse(models_df$outcome == "transfer", "smuggling", models_df$outcome)

# PLOT ---------------------------------------------------------------

models_df %>%
  ggplot(aes(x = reorder(outcome, -estimate), 
             y = estimate, 
             color = factor(model, levels = c("Bivariate", "Locality FE")), 
             shape = factor(model, levels = c("Bivariate", "Locality FE"))
  )) +
  geom_hline(aes(yintercept = 0), linetype = 2, color = "gray20") +
  geom_linerange(aes(ymin = ci_lo_95, ymax = ci_hi_95), 
                 position = position_dodge(width = 0.1),
                 size = 0.5) +
  geom_linerange(aes(ymin = ci_lo_90, ymax = ci_hi_90), 
                 position = position_dodge(width = 0.1),
                 size = 1) +
  geom_point(size = 1.75
             , position = position_dodge(width = 0.1)
  ) +
  facet_wrap(country~hh_status
             , ncol = 4
  ) +
  theme_bw() +
  theme(axis.text=element_text(size=8),
        # axis.text.y = element_text(size = 15),
        legend.title = element_blank(),
        legend.position = "bottom",
        panel.background = element_rect(fill = NA, color = "black")
  ) +
  labs(
    x = "Outcomes",
    y = "Estimates"
  ) +
  scale_color_viridis_d(option = "inferno",
                        begin = 0.2, end = 0.85) +
  coord_flip()

# SAVE ---------------------------------------------------------------

ggsave(
  "E2_LPM_mechs_EU.png",
  plot = last_plot(),
  path = "./00 SUBMITTED/00 APSR final/03 dataverse_online_appendix/figures",
  width = 23,
  height = 29,
  units = "cm",
  dpi = 300
)

